from pymongo import MongoClient
import pymongo
client = MongoClient(port=27017)
db=client['tiktok']
import csv
import os
import json
import numpy as np

htlist=['holidayvibes','gamenight','happyholidays','carsoftiktok','fallguysmoments','homecooked','veteransday','youwantmore','interiordesign','coldweather','wildanimals','mycostume','meleaving','mypfp','catchphrases','watchmegrow','holidaycrafts','growupwithme','clingypet','happyhanukkah','lunarnewyear',
       'tabletop','comfortfood','selfimprovement','2021affirmations','perfectmatch','givingszn','holidaycountdown','bakingszn','holidaymusic','familyimpression','inkdrawing','WeekendVibes','recordsday','productivity','smallbusiness'
       ,'falldiy','whenwewereyounger','yellow','ComingOfAge','artmas','gaminglife','gamingsetup','hellowinter','planttiktok','housetour','neonshadow','homeoffice','raisedby','foodtiktok','valentinesday','yougotthis','stemlife','makeitvogue']
htranking={}
i=0
with open('D:\\Work\\kusuri\\video_htn_ct.tsv','w',newline='\n',encoding='utf-8')as fin:
    writer = csv.writer(fin, delimiter='\t')
    for ht in htlist:
        htranking[ht]=i
        i+=1
        idlist=[]
        for obj in db[ht].find():
            if len(obj['video_feature']['img_embed']) > 0 and len(obj['text_feature']['text']) > 0 and len(
                obj['video_feature']['audio']['yamnet']) > 0 and (
                    'var_sb' in obj['video_feature']['editing'].keys()) and (
                    'avg_sticker_length' in obj['video_feature']['editing'].keys()) and (
                    'avg_scences' in obj['video_feature']['editing'].keys())and (
                    'var_yamnet' in obj['video_feature']['editing'].keys()):
                idlist.append(obj['_id'])
        htn=np.zeros(len(htlist))
        htn[i]=1

        for file in os.listdir('D:\\Work\\Tool\\tiktok\\TikToks\\'):
            if ht in file:
                dt = file.split('_')[2].replace('.json', '')
                with open('D:\\Work\\Tool\\tiktok\\TikToks\\' + file, 'r', encoding='utf-8',
                          newline='\n') as filename_input:
                    lc = 0
                    for line in filename_input:
                        z = json.loads(line)
                        if lc >= 2000:
                            break
                        k = ''
                        if 'id' in z.keys():
                            k = z['id']
                        elif 'itemInfos' in z.keys():
                            k = z['itemInfos']['id']
                        if k in idlist:
                            writer.writerow([k,htn,dt])

                        lc += 1